Hello

  • Analyse

In [6]:
import pandas as pd

df = pd.read_csv("../dataset/git_demo_timestamp_linux.csv", sep=";")
df.author.value_counts().head(10)


Out[6]:
Linus Torvalds           24259
David S. Miller           9563
Mark Brown                6917
Takashi Iwai              6293
Al Viro                   6064
H Hartley Sweeten         5942
Ingo Molnar               5462
Mauro Carvalho Chehab     5384
Arnd Bergmann             5305
Greg Kroah-Hartman        4687
Name: author, dtype: int64

In [8]:
%matplotlib inline
df.author.value_counts().head(10).plot(kind='pie')


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x1bea50eeac8>

In [10]:
df['timestamp_local'] = pd.to_datetime(df['timestamp_local'])
df.describe()


Out[10]:
timestamp_local author
count 723214 723213
unique 691746 17877
top 2017-11-01 03:56:19 Linus Torvalds
freq 137 24259
first 2005-04-16 15:20:36 NaN
last 2017-12-31 16:52:15 NaN

In [13]:
df.timestamp_local.dt.year.value_counts(sort=False).plot(kind='bar')


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x1bea51e3080>

In [15]:
df.timestamp_local.dt.hour.value_counts(sort=False).plot(kind='bar')


Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x1be9e950e10>